/* For detailed instructions, see README file */


OPTIONS SOURCE MACROGEN SYMBOLGEN MPRINT COMPRESS=YES REUSE=YES SORTSIZE= MAX;




/*=========================================*/
/*Select path where files are saved*/
libname OP "INSERT PATH";
/*Select path to save logs*/
%let LOG_PATH=INSERT PATH\Table10_11.log;
options dlcreatedir; 
%PUT &LOG_PATH;
/*=========================================*/

proc printto log="&LOG_PATH" new;
run;



FOOTNOTE " ";
ODS TITLE " ";
ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
/*=========================================*/

DATA OPTION_DATA;
SET OP.OPRA_Pseudo;
MONTH= MONTH(DATE);
YEAR=YEAR(DATE);
year_month = input(cats(year,month),yymmn6.);


format year_month yymmn6.;
if DMM_IDENTITY ^= "NO_D_Orderflow" & MISSING(DMM_IDENTITY)=0 & DMM_IDENTITY ^= "NoDMM";

IF AUCTION_IND=1;

IF DMMP_IND=1;




KEEP YEAR_MONTH EQ DMM_IDENTITY DATE AUCTION_IND PIMP_C S_VOL PFOF_IND;
RUN;





DATA Eq_lou_auc_pfof_daily_all_pfofs;
	FORMAT S_PIMP_C_LOU BEST12.;
	FORMAT S_PIMP_C_LOU_2 BEST12.;
	FORMAT S_AUCT_INT_LOU BEST12.;
	FORMAT EQ_LOU BEST12.;
	FORMAT DMM_IDENTITY $CHAR12.;
	FORMAT NUM_DMM BEST12.;
	FORMAT DATE DATE9.;
	FORMAT YEAR_MONTH YYMMN6.;

RUN;


%MACRO LOU_DAILY(DATA,DMM_IDENTITY);
PROC SQL;
	CREATE TABLE MEAN_EQ_TEMP
	AS SELECT
	
	MEAN(EQ) AS EQ_LOU,
	SUM(PIMP_C) AS S_PIMP_C_LOU, YEAR_MONTH, COUNT(DISTINCT DMM_IDENTITY) AS NUM_DMM,
	SUM(PIMP_C*S_VOL) AS S_PIMP_C_LOU_2,
	SUM(CASE WHEN AUCTION_IND=1 THEN 1 ELSE 0 END) AS S_AUCT_INT_LOU,
	DATE
	FROM &DATA(WHERE =(DMM_IDENTITY^="&DMM_IDENTITY"))
	GROUP BY YEAR_MONTH, DATE;
QUIT;



DATA MEAN_EQ_TEMP;
	SET MEAN_EQ_TEMP;
	DMM_IDENTITY = INPUT(SYMGET("DMM_IDENTITY"),$12.);
RUN;

PROC APPEND DATA= MEAN_EQ_TEMP BASE=Eq_lou_auc_pfof_daily_all_pfofs;RUN;
%MEND LOU_DAILY;




%MACRO SCANLOOP_First_Daily(SCANFILE,FIELD1);

DATA _NULL_;
IF 0 THEN SET &SCANFILE NOBS=X;
CALL SYMPUT('RECCOUNT',X);
STOP;
RUN;

%DO I=1 %TO &RECCOUNT;
DATA _NULL_;
SET &SCANFILE (FIRSTOBS=&I);

CALL SYMPUT('VAR1',&FIELD1);




STOP;
RUN;




%LOU_daily(OPTION_DATA_ALL_PFOF,&VAR1)


%END;
%MEND SCANLOOP_First_Daily;




PROC SORT DATA=OP.Rule_606 out=Ws_dollar;BY YEAR_MONTH;RUN;

proc tabulate data=Ws_dollar out=Ws_dollar;
class DMM_IDENTITY;
var Total_Contracts;
tables
DMM_IDENTITY*TOTAL_CONTRACTS*(PCTSUM);
BY YEAR_MONTH;
RUN;
ods results;


PROC SQL;
	CREATE TABLE
	OPTION_DATA
	AS SELECT
	A.*, C.PCTSUM AS PCTSUM_CONTRACTS
	FROM  OPTION_DATA A
	LEFT JOIN Ws_dollar C
	ON A.DMM_IDENTITY=C.DMM_IDENTITY & A.YEAR_MONTH=C.YEAR_MONTH;

QUIT;

data OPTION_DATA_ALL_PFOF;
	set OPTION_DATA;
	IF MISSING(PCTSUM_CONTRACTS)=0;
RUN;


PROC SORT DATA=OPTION_DATA_ALL_PFOF OUT=INPUT NODUPKEY; BY DMM_IDENTITY; RUN;

%SCANLOOP_First_Daily(INPUT, DMM_IDENTITY);



DATA REG_DATA;
SET OP.OPRA_Pseudo;
MONTH= MONTH(DATE);
YEAR=YEAR(DATE);
year_month = input(cats(year,month),yymmn6.);


format year_month yymmn6.;
if DMM_IDENTITY ^= "NO_D_Orderflow" & MISSING(DMM_IDENTITY)=0 & DMM_IDENTITY ^= "NoDMM";

IF AUCTION_IND=1;

IF DMMP_IND=1;

RUN;



PROC SQL;
	CREATE TABLE DAILY_EQ_AUC
	AS SELECT
	DMM_IDENTITY,MEAN(EQ) AS DAILY_EQ,  SUM(PIMP_C*S_VOL) AS DAILY_PIMP_C_2, COUNT(*) AS AUCTION_FREQ, DATE, YEAR_MONTH
	FROM REG_DATA(WHERE=(AUCTION_IND=1))
	GROUP BY DMM_IDENTITY,YEAR_MONTH, DATE;
QUIT;


data  Eq_lou_auc_pfof_daily_all_pfofs;
	set  Eq_lou_auc_pfof_daily_all_pfofs;
	if missing(DMM_IDENTITY)=0;
RUN;
	


PROC SORT DATA=DAILY_EQ_AUC ;BY DMM_IDENTITY YEAR_MONTH DATE;RUN;
proc expand data= DAILY_EQ_AUC out=DAILY_EQ_AUC method=none;
   convert DAILY_EQ = LAG_DAILY_EQ  /method=none  transout=(LAG 1);
   convert DAILY_PIMP_C_2 = LAG_DAILY_PIMP_C_2  /method=none  transout=(LAG 1);
   convert AUCTION_FREQ = LAG_AUCTION_FREQ /method=none  transout=(LAG 1);


   BY DMM_IDENTITY YEAR_MONTH;
run;

proc expand data= DAILY_EQ_AUC out=DAILY_EQ_AUC method=none;
   convert LAG_DAILY_PIMP_C_2= MS_LAG_DAILY_PIMP_C_2 /method=none  transout=(MOVSUM 25);
   convert LAG_AUCTION_FREQ = MS_LAG_AUCTION_FREQ /method=none  transout=(MOVSUM 25);
   convert LAG_DAILY_EQ  = MA_LAG_DAILY_EQ   /method=none  transout=(MOVAVE 25);


   BY DMM_IDENTITY YEAR_MONTH;
run;

PROC SORT DATA=DAILY_EQ_AUC ;BY DMM_IDENTITY YEAR_MONTH DATE;RUN;

PROC SORT DATA= Eq_lou_auc_pfof_daily_all_pfofs out=Eq_lou_auc_pfof_daily_all_pfofs; BY DMM_IDENTITY YEAR_MONTH DATE;RUN;
proc expand data= Eq_lou_auc_pfof_daily_all_pfofs out=Eq_lou_auc_pfof_daily_all_pfofs method=none;
    convert S_PIMP_C_LOU_2 = LAG_S_PIMP_C_LOU_2  /method=none  transout=(LAG 1);
   convert S_AUCT_INT_LOU = LAG_S_AUCT_INT_LOU  /method=none  transout=(LAG 1);
   convert EQ_LOU = LAG_EQ_LOU /method=none  transout=(LAG 1);

   convert NUM_DMM = LAG_NUM_DMM  /method=none  transout=(LAG 1);

   BY DMM_IDENTITY YEAR_MONTH;
run;
DATA Eq_lou_auc_pfof_daily_all_pfofs;
	SET Eq_lou_auc_pfof_daily_all_pfofs;

	A_LAG_S_PIMP_C_LOU_2 = LAG_S_PIMP_C_LOU_2/LAG_NUM_DMM;
	A_LAG_S_AUCT_INT_LOU = LAG_S_AUCT_INT_LOU/LAG_NUM_DMM;
RUN;


PROC SORT DATA= Eq_lou_auc_pfof_daily_all_pfofs out=Eq_lou_auc_pfof_daily_all_pfofs; BY DMM_IDENTITY YEAR_MONTH DATE;RUN;


proc expand data= Eq_lou_auc_pfof_daily_all_pfofs out=Eq_lou_auc_pfof_daily_all_pfofs method=none;
   convert A_LAG_S_PIMP_C_LOU_2 = MS_LAG_S_PIMP_C_LOU_2  /method=none  transout=(MOVSUM 25);
   convert A_LAG_S_AUCT_INT_LOU = MS_LAG_S_AUCT_INT_LOU  /method=none  transout=(MOVSUM 25);
   convert LAG_EQ_LOU = MA_LAG_EQ_LOU  /method=none  transout=(MOVAVE 25);


   BY DMM_IDENTITY YEAR_MONTH;
run;



PROC SQL;
	CREATE TABLE DAILY_EQ_AUC
	AS SELECT A.DATE, A.DMM_IDENTITY ,  A.MS_LAG_DAILY_PIMP_C_2, A.MS_LAG_AUCTION_FREQ,A.MA_LAG_DAILY_EQ,B.MA_LAG_EQ_LOU,
	B.MS_LAG_S_PIMP_C_LOU_2, B.MS_LAG_S_AUCT_INT_LOU

	FROM DAILY_EQ_AUC A LEFT JOIN Eq_lou_auc_pfof_daily_all_pfofs B
	ON A.DATE=B.DATE & A.DMM_IDENTITY=B.DMM_IDENTITY;
QUIT;


PROC SORT DATA=OP.Rule_606 out=Ws_dollar;BY YEAR_MONTH;RUN;
proc tabulate data=Ws_dollar out=Ws_dollar;
class DMM_IDENTITY;
var Total_Contracts;
tables
DMM_IDENTITY*TOTAL_CONTRACTS*(PCTSUM);

BY YEAR_MONTH;
RUN;


DATA DAILY_EQ_AUC;
	SET DAILY_EQ_AUC;
	YEAR = YEAR(DATE);
	MONTH = MONTH(DATE);
	YEAR_MONTH = INPUT(CATS(YEAR,MONTH),YYMMN6.);
	FORMAT YEAR_MONTH YYMMN6.;
RUN;

PROC SQL;
	CREATE TABLE
	REG_DATA_2_FULL
	AS SELECT
	A.*,  B.MS_LAG_DAILY_PIMP_C_2, B.MS_LAG_AUCTION_FREQ,B.MA_LAG_EQ_LOU,B.MA_LAG_DAILY_EQ,
	B.MS_LAG_S_PIMP_C_LOU_2, B.MS_LAG_S_AUCT_INT_LOU, C.PCTSUM AS PCTSUM_CONTRACTS, D.Total_Contracts
	
	FROM  REG_DATA  A LEFT JOIN  DAILY_EQ_AUC B
	ON A.DMM_IDENTITY=B.DMM_IDENTITY & A.DATE=B.DATE
	LEFT JOIN Ws_dollar C
	ON A.DMM_IDENTITY=C.DMM_IDENTITY & A.YEAR_MONTH=C.YEAR_MONTH
	LEFT JOIN OP.Rule_606 D
	ON A.DMM_IDENTITY=D.DMM_IDENTITY & A.YEAR_MONTH=D.YEAR_MONTH
	LEFT JOIN OP.Rule_606 F
	ON A.DMM_IDENTITY=F.DMM_IDENTITY & A.YEAR_MONTH=F.YEAR_MONTH
;

QUIT;




proc sort data= REG_DATA_2_FULL; by SYMBOL_ONLY;RUN;
data INPUT;
	set REG_DATA_2_FULL;
	BY SYMBOL_ONLY;


	IF MS_LAG_DAILY_PIMP_C_2<MS_LAG_S_PIMP_C_LOU_2 THEN PIMPR_BELOW=1; ELSE PIMPR_BELOW=0;
	PIMPR_BLOW_PFOF_INT_AUC= PIMPR_BELOW*PFOF_IND;


	IF MS_LAG_AUCTION_FREQ<MS_LAG_S_AUCT_INT_LOU THEN AUCTION_BELOW=1; ELSE AUCTION_BELOW=0;
	AUCTION_BELOW_PFOF_INT_AUC= AUCTION_BELOW*PFOF_IND;

	IF MA_LAG_DAILY_EQ<MA_LAG_EQ_LOU THEN EQ_BELOW=1; ELSE EQ_BELOW=0;
	AUCTION_ABOVE = 1- AUCTION_BELOW;
	EQ_ABOVE = 1- EQ_BELOW;
	EQ_ABOVE_PFOF_INT_AUC= EQ_ABOVE*PFOF_IND;



	LOG_CONTR_PUR = LOG(1+Total_Contracts);

	


	IF MISSING(MS_LAG_DAILY_PIMP_C_2)=1 OR MISSING(MS_LAG_S_PIMP_C_LOU_2)=1 THEN PIMPR_BELOW=.;
	IF MISSING(MS_LAG_AUCTION_FREQ)=1 OR MISSING(MS_LAG_S_AUCT_INT_LOU)=1 THEN AUCTION_BELOW=.;
	IF MISSING(MA_LAG_DAILY_EQ)=1 OR MISSING(MA_LAG_EQ_LOU)=1 THEN AUCTION_BELOW=.;



	IF MISSING(PCTSUM_CONTRACTS)=1 THEN DELETE;

	
	STOCK_QS = STOCK_QS/100;

	QUOTEDSPREAD_C = QUOTEDSPREAD_C/100;

	CHARDATE = put(date,date9.);





RUN;





PROC SQL;
	CREATE TABLE INPUT_D1
	AS SELECT

PFOF_IND - 					MEAN(PFOF_IND) AS PFOF_IND,
PCTSUM_CONTRACTS - 			MEAN(PCTSUM_CONTRACTS) AS PCTSUM_CONTRACTS,
EQ_ABOVE - 					MEAN(EQ_ABOVE) AS EQ_ABOVE,
PIMPR_BELOW - 				MEAN(PIMPR_BELOW) AS PIMPR_BELOW,
AUCTION_BELOW - 			MEAN(AUCTION_BELOW) AS AUCTION_BELOW,
Abs_Delta - 				MEAN(Abs_Delta) as Abs_Delta,
STOCK_QS - 					MEAN(STOCK_QS) AS STOCK_QS,
INV_STOCK_MIDPOINT - 		MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
QUOTEDSPREAD_C - 			MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C,
GAMMA - 					MEAN(GAMMA) AS GAMMA,
VEGA - 						MEAN(VEGA) AS VEGA,
Buy_IND - 					MEAN(Buy_IND) as Buy_IND,
INV_OPTION_MIDPOINT - 		MEAN(INV_OPTION_MIDPOINT) AS INV_OPTION_MIDPOINT,
CALL_IND - 					MEAN(CALL_IND) AS CALL_IND,
LOG_DAYS_EXP - 				MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
LOG_S_VOL - 				MEAN(LOG_S_VOL) AS LOG_S_VOL,
TICK_CHANGE_IND - 			MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
LOG_MCAP - 					MEAN(LOG_MCAP) AS LOG_MCAP,
EQ_ABOVE_PFOF_INT_AUC - 	MEAN(EQ_ABOVE_PFOF_INT_AUC) AS EQ_ABOVE_PFOF_INT_AUC,
PIMPR_BLOW_PFOF_INT_AUC - 	MEAN(PIMPR_BLOW_PFOF_INT_AUC) AS PIMPR_BLOW_PFOF_INT_AUC,
EQ - 						MEAN(EQ) AS EQ,
PIMP_C                    - MEAN(PIMP_C) AS PIMP_C,


CHARDATE, SYMBOL_ONLY
FROM INPUT
GROUP BY SYMBOL_ONLY;
QUIT;



PROC SQL;
	CREATE TABLE INPUT_D2
	AS SELECT

PFOF_IND - 					MEAN(PFOF_IND) AS PFOF_IND,
PCTSUM_CONTRACTS - 			MEAN(PCTSUM_CONTRACTS) AS PCTSUM_CONTRACTS,
EQ_ABOVE - 					MEAN(EQ_ABOVE) AS EQ_ABOVE,
PIMPR_BELOW - 				MEAN(PIMPR_BELOW) AS PIMPR_BELOW,
AUCTION_BELOW - 			MEAN(AUCTION_BELOW) AS AUCTION_BELOW,
Abs_Delta - 				MEAN(Abs_Delta) as Abs_Delta,
STOCK_QS - 					MEAN(STOCK_QS) AS STOCK_QS,
INV_STOCK_MIDPOINT - 		MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
QUOTEDSPREAD_C - 			MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C,
GAMMA - 					MEAN(GAMMA) AS GAMMA,
VEGA - 						MEAN(VEGA) AS VEGA,
Buy_IND - 					MEAN(Buy_IND) as Buy_IND,
INV_OPTION_MIDPOINT - 		MEAN(INV_OPTION_MIDPOINT) AS INV_OPTION_MIDPOINT,
CALL_IND - 					MEAN(CALL_IND) AS CALL_IND,
LOG_DAYS_EXP - 				MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
LOG_S_VOL - 				MEAN(LOG_S_VOL) AS LOG_S_VOL,
TICK_CHANGE_IND - 			MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
LOG_MCAP - 					MEAN(LOG_MCAP) AS LOG_MCAP,
EQ_ABOVE_PFOF_INT_AUC - 	MEAN(EQ_ABOVE_PFOF_INT_AUC) AS EQ_ABOVE_PFOF_INT_AUC,
PIMPR_BLOW_PFOF_INT_AUC - 	MEAN(PIMPR_BLOW_PFOF_INT_AUC) AS PIMPR_BLOW_PFOF_INT_AUC,
EQ - 						MEAN(EQ) AS EQ,
PIMP_C                    - MEAN(PIMP_C) AS PIMP_C,



CHARDATE, SYMBOL_ONLY
FROM INPUT_D1
GROUP BY CHARDATE;
QUIT;





PROC SORT DATA=INPUT; BY SYMBOL_ONLY;RUN;



ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 

PFOF_IND=


PCTSUM_CONTRACTS

EQ_ABOVE


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP


/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

PFOF_IND=


PCTSUM_CONTRACTS

EQ_ABOVE

Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



CHARDATE  

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;

run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;

run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
	rename Estimate=value;
	Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;



data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_1;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;


	if PARAMETER="EQ_ABOVE" then n=1;
	if PARAMETER="PIMPR_BELOW" then n=2;
	if PARAMETER="AUCTION_BELOW" then n=3;
	if PARAMETER="PFOF_IND" then n=4;	
	if PARAMETER="AUC_PFOF" then n=5;
	if PARAMETER="PCTSUM_CONTRACTS" then n=6;
	if PARAMETER="CONTRACT_IND" then n=7;
	if PARAMETER = "Abs_Delta" or PARAMETER = "ABS_DELTA" then n=8;
	if PARAMETER="QUOTEDSPREAD_C" then n=9;
	if PARAMETER="GAMMA" then n=10;
	if PARAMETER="VEGA" then n=11;
	if PARAMETER="P_ABS_LAG_STOCK_RET" then n=12;
	if PARAMETER = "STOCK_QS"  then n=13;
	if PARAMETER ="INV_OPTION_MIDPOINT" then n=14;
	if PARAMETER ="INV_STOCK_MIDPOINT" then n=15;
	if PARAMETER ="LOG_MCAP" then n=16;
	if PARAMETER="LOG_S_VOL" then n=17;
	if PARAMETER ="CALL_IND" then n=18;
	if PARAMETER = "LOG_DAYS_EXP" then n=19;
	if PARAMETER = "TICK_CHANGE_IND" then n=20;
	if PARAMETER ="BUY_IND" then n=21;
	if PARAMETER="Rsq" then n=22;
	if PARAMETER ="NObs" then n=23; 


run;


proc sql;
	create table Regression_1
	as select a.*, b.Probt
	from Regression_1 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;









ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
PFOF_IND=

PCTSUM_CONTRACTS

PIMPR_BELOW

Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;






ODS RESULTS;
ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
ABSORB SYMBOL_ONLY;
CLASS CHARDATE;
model 

PFOF_IND=

PCTSUM_CONTRACTS

PIMPR_BELOW


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



CHARDATE 


/solution noint; 
run;
quit;






data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;
run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;
run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;




data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_2;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;

	if PARAMETER="EQ_ABOVE" then n=1;
	if PARAMETER="PIMPR_BELOW" then n=2;
	if PARAMETER="AUCTION_BELOW" then n=3;
	if PARAMETER="PFOF_IND" then n=4;	
	if PARAMETER="AUC_PFOF" then n=5;
	if PARAMETER="PCTSUM_CONTRACTS" then n=6;
	if PARAMETER="CONTRACT_IND" then n=7;
	if PARAMETER = "Abs_Delta" or PARAMETER = "ABS_DELTA" then n=8;
	if PARAMETER="QUOTEDSPREAD_C" then n=9;
	if PARAMETER="GAMMA" then n=10;
	if PARAMETER="VEGA" then n=11;
	if PARAMETER="P_ABS_LAG_STOCK_RET" then n=12;
	if PARAMETER = "STOCK_QS"  then n=13;
	if PARAMETER ="INV_OPTION_MIDPOINT" then n=14;
	if PARAMETER ="INV_STOCK_MIDPOINT" then n=15;
	if PARAMETER ="LOG_MCAP" then n=16;
	if PARAMETER="LOG_S_VOL" then n=17;
	if PARAMETER ="CALL_IND" then n=18;
	if PARAMETER = "LOG_DAYS_EXP" then n=19;
	if PARAMETER = "TICK_CHANGE_IND" then n=20;
	if PARAMETER ="BUY_IND" then n=21;
	if PARAMETER="Rsq" then n=22;
	if PARAMETER ="NObs" then n=23; 


run;


proc sql;
	create table Regression_2
	as select a.*, b.Probt
	from Regression_2 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;









ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
PFOF_IND=


PCTSUM_CONTRACTS

AUCTION_BELOW

Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP






/NOINT
;


quit;
ods results;




data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
		set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;











ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;

model 
PFOF_IND=


PCTSUM_CONTRACTS

AUCTION_BELOW


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
Buy_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP




CHARDATE 


/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;

		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;

run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;
run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;




data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;




data Regression_3;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;


	if PARAMETER="EQ_ABOVE" then n=1;
	if PARAMETER="PIMPR_BELOW" then n=2;
	if PARAMETER="AUCTION_BELOW" then n=3;
	if PARAMETER="PFOF_IND" then n=4;	
	if PARAMETER="AUC_PFOF" then n=5;
	if PARAMETER="PCTSUM_CONTRACTS" then n=6;
	if PARAMETER="CONTRACT_IND" then n=7;
	if PARAMETER = "Abs_Delta" or PARAMETER = "ABS_DELTA" then n=8;
	if PARAMETER="QUOTEDSPREAD_C" then n=9;
	if PARAMETER="GAMMA" then n=10;
	if PARAMETER="VEGA" then n=11;
	if PARAMETER="P_ABS_LAG_STOCK_RET" then n=12;
	if PARAMETER = "STOCK_QS"  then n=13;
	if PARAMETER ="INV_OPTION_MIDPOINT" then n=14;
	if PARAMETER ="INV_STOCK_MIDPOINT" then n=15;
	if PARAMETER ="LOG_MCAP" then n=16;
	if PARAMETER="LOG_S_VOL" then n=17;
	if PARAMETER ="CALL_IND" then n=18;
	if PARAMETER = "LOG_DAYS_EXP" then n=19;
	if PARAMETER = "TICK_CHANGE_IND" then n=20;
	if PARAMETER ="BUY_IND" then n=21;
	if PARAMETER="Rsq" then n=22;
	if PARAMETER ="NObs" then n=23; 



run;



proc sql;
	create table Regression_3
	as select a.*, b.Probt
	from Regression_3 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;















DATA TEMP;
	SET REGRESSION_1 REGRESSION_2 REGRESSION_3;
RUN;

PROC SORT DATA=TEMP NODUPKEY; BY PARAMETER TYPE N;RUN;



PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 

	G.PARAMETER AS PARAMETER,G.TYPE AS TYPE,G.N AS N,
	A.VALUE AS VALUE_R3,A.PROBT AS PROBT_R3, A.N as N_R3, A.PARAMETER AS PARAMETER_R3,A.TYPE AS TYPE_R3,
	B.VALUE AS VALUE_R2,B.PROBT AS PROBT_R2, B.N as N_R2, B.PARAMETER AS PARAMETER_R2,B.TYPE AS TYPE_R2,
	D.VALUE AS VALUE_R1,D.PROBT AS PROBT_R1, D.N as N_R1, D.PARAMETER AS PARAMETER_R1,D.TYPE AS TYPE_R1
	FROM
	TEMP G
	LEFT JOIN REGRESSION_1 D
	ON G.PARAMETER=D.PARAMETER & G.TYPE=D.TYPE
	LEFT JOIN
	REGRESSION_2 B 
	ON G.PARAMETER=B.PARAMETER & G.TYPE=B.TYPE
	LEFT JOIN REGRESSION_3 A
	ON G.PARAMETER=A.PARAMETER & G.TYPE=A.TYPE;
QUIT;








DATA Regression_Combined;
	SET Regression_Combined;
	PARAMETER = compress(PARAMETER);

IF PARAMETER = "PFOF_IND" & TYPE="E" then VARIABLE = PUT("PFOF Ind.",$48.);
IF PARAMETER = "PFOF_IND" & TYPE="S" then VARIABLE = PUT("StdPFOF Ind.",$48.);


IF PARAMETER = "LOG_AUCTION_FREQ" & TYPE="E" then VARIABLE = PUT("Daily DMM Auction Freq $_{t-1}",$48.);
IF PARAMETER = "LOG_AUCTION_FREQ" & TYPE="S" then VARIABLE = PUT("Daily DMM Auction Freq $_{t-1}",$48.);





if TYPE="E" & (PARAMETER="QS_OPTIONSCALED" or PARAMETER="QUOTEDSPREAD_C" or PARAMETER="QuotedSpread_C")    then VARIABLE = PUT("Option QS",$48.);
if TYPE="S" & (PARAMETER="QS_OPTIONSCALED" or PARAMETER="QUOTEDSPREAD_C" or PARAMETER="QuotedSpread_C")   then VARIABLE = PUT("StdOption QS",$48.);



IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="E" THEN VARIABLE = PUT("1/Stock Midpoint",$48.);
IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="S" THEN VARIABLE = PUT("Std1/Stock Midpoint",$48.);

IF PARAMETER = "GAMMA" & TYPE="E" THEN VARIABLE = PUT("GAMMA",$48.);
IF PARAMETER = "GAMMA" & TYPE="S" THEN VARIABLE = PUT("StdGAMMA",$48.);

if parameter="PIMPR_BELOW" & TYPE="E" THEN VARIABLE = PUT("PIMPR Below",$48.);
if parameter="PIMPR_BELOW" & TYPE="S" THEN VARIABLE = PUT("StdPIMPR Below",$48.);

if parameter="PCTSUM_CONTRACTS" & TYPE="E" THEN VARIABLE = PUT("Contracts Purchased (\%)",$48.);
if parameter="PCTSUM_CONTRACTS" & TYPE="S" THEN VARIABLE = PUT("StdContr. Purchased",$48.);



IF PARAMETER = "INV_OPTION_MIDPOINT"& TYPE="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER = "INV_OPTION_MIDPOINT"& TYPE="S" THEN VARIABLE = PUT("Std/Option Midpoint",$48.);

IF PARAMETER ="Abs_Delta" & TYPE="S" THEN VARIABLE = PUT("StdAbsDelta",$48.);
IF PARAMETER="Abs_Delta"  & TYPE="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);

IF PARAMETER = "LOG_S_VOL"& TYPE="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER = "LOG_S_VOL"& TYPE="S" THEN  VARIABLE = PUT("StdTrade size",$48.);


IF PARAMETER = "BUY_IND"& TYPE="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER = "BUY_IND"& TYPE="S" THEN  VARIABLE = PUT("StdBuy",$48.);


IF PARAMETER = "VEGA"& TYPE="E" THEN  VARIABLE = PUT("VEGA",$48.);
IF PARAMETER = "VEGA"& TYPE="S" THEN  VARIABLE = PUT("STDVEGA",$48.);


IF PARAMETER = "STOCK_QS"& TYPE="E" THEN  VARIABLE = PUT("Stock QS",$48.);
IF PARAMETER = "STOCK_QS"& TYPE="S" THEN  VARIABLE = PUT("Stock QS",$48.);


IF PARAMETER = "TICK_CHANGE_IND"& TYPE="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER = "TICK_CHANGE_IND"& TYPE="S" THEN  VARIABLE = PUT("StdTickSize",$48.);




IF PARAMETER = "CALL_IND"& TYPE="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER = "CALL_IND"& TYPE="S" THEN  VARIABLE = PUT("StdCall",$48.);


IF PARAMETER = "LOG_DAYS_EXP"& TYPE="E" THEN  VARIABLE = PUT("Days-to-Exp",$48.);
IF PARAMETER = "LOG_DAYS_EXP"& TYPE="S" THEN  VARIABLE = PUT("StdDays-to-Exp",$48.);



IF PARAMETER = "LOG_MCAP"& TYPE="E" THEN  VARIABLE = PUT("MCAP",$48.);
IF PARAMETER = "LOG_MCAP"& TYPE="S" THEN  VARIABLE = PUT("StdMCAP",$48.);



IF PARAMETER="ABS_DELTA"  & TYPE="S" THEN VARIABLE = PUT("StdAbsDelta",$48.);
IF PARAMETER="ABS_DELTA"  & TYPE="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);


IF PARAMETER = "Rsq" & TYPE="E" THEN  VARIABLE = PUT("R-Sq",$48.);



if PARAMETER="DMMP_IND" & TYPE="E" then VARIABLE = PUT("DMMP Ind.",$48.);
if PARAMETER="AUC_PFOF" & TYPE="E" then VARIABLE = PUT("Auction $\times$ PFOF Ind.",$48.);
if PARAMETER="C_HC" & TYPE="E" then VARIABLE = PUT("Initial HC",$48.);
if PARAMETER="C_HC_Leeland"& TYPE="E" then VARIABLE = PUT("Dynamic HC",$48.);
if PARAMETER="Gamma_vol" & TYPE="E" then VARIABLE = PUT("Gamma \times \sigma_{U}",$48.);

if PARAMETER="DMMP_IND" & TYPE="S" then VARIABLE = PUT("StdDMMP Ind.",$48.);
if PARAMETER="AUC_PFOF" & TYPE="S" then VARIABLE = PUT("StdAuction $\times$ PFOF Ind.",$48.);
if PARAMETER="C_HC" & TYPE="S" then VARIABLE = PUT("StdInitial HC",$48.);
if PARAMETER="C_HC_Leeland"& TYPE="S" then VARIABLE = PUT("StdDynamic HC",$48.);
if PARAMETER="Gamma_vol" & TYPE="S" then VARIABLE = PUT("StdGamma \times \sigma_{U}",$48.);






if PARAMETER="EQ_ABOVE" & TYPE="E" then VARIABLE = PUT("EQ Above$_{t-1}$",$48.);
if PARAMETER="PIMPR_BELOW" & TYPE="E" then VARIABLE = PUT("Price Improvement Below$_{t-1}$",$48.);
if PARAMETER="AUCTION_BELOW" & TYPE="E" then VARIABLE = PUT("Auction Frequency Below$_{t-1}$",$48.);




IF PARAMETER = "Rsq"  THEN  VARIABLE = PUT("RSq",$48.);
IF PARAMETER = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);






	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R1) then do;
	
			if    PROBT_R1<0.01 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\threeS"),$28.);
			else if PROBT_R1<0.05 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\twoS"),$28.);
			else if PROBT_R1<0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\oneS"),$28.);
			else if PROBT_R1>0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$CHAR18.);
	end;
	
	



	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R2) then do;
	
			if    PROBT_R2<0.01 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\threeS"),$28.);
			else if PROBT_R2<0.05 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\twoS"),$28.);
			else if PROBT_R2<0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\oneS"),$28.);
			else if PROBT_R2>0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$CHAR28.);
	end;
	
	

	
	
	
	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R3) then do;
	
			if    PROBT_R3<0.01 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\threeS"),$48.);
			else if PROBT_R3<0.05 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\twoS"),$48.);
			else if PROBT_R3<0.1 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\oneS"),$48.);
			else if PROBT_R3>0.1 & Type_R3="E" then VALUE_CHAR_R3=PUT(PUT(Value_R3,8.3),$CHAR38.);
	end;
	

	
	
	
	

	




		if Type="S" then do;
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_R1) THEN
				VALUE_CHAR_R1=PUT(CATS("(",PUT(Value_R1,8.3),")"),$28.);
				IF NOT MISSING(Value_R2) THEN
				VALUE_CHAR_R2=PUT(CATS("(",PUT(Value_R2,8.3),")"),$28.);
				IF NOT MISSING(Value_R3) THEN
				VALUE_CHAR_R3=PUT(CATS("(",PUT(Value_R3,8.3),")"),$28.);
				
	
			end;
			
	
	 


if VARIABLE in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") then do;
	
VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$28.);
VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$28.);
VALUE_CHAR_R3=PUT(PUT(Value_R3,8.3),$28.);



END;





RUN;



PROC SORT DATA=Regression_Combined;BY N TYPE;RUN;

DATA Regression_Combined;
	SET Regression_Combined;
	IF TYPE="S" THEN VARIABLE=" ";
RUN;



data fixed_effects;
  do i= 1 to 2;
  output;
  end;
run;


data fixed_effects;
	set fixed_effects;
		if _n_=1 then do; VARIABLE = PUT("Date FE",$48.); N=22; TYPE="Z"; end;
		if _n_=2 then do; VARIABLE = PUT("Underlying FE",$48.); N=24; TYPE="Z";end;
		
run;


DATA Regression_Combined;
	SET Regression_Combined fixed_effects;

	IF VARIABLE = "Date FE" THEN DO;
		VALUE_CHAR_R3=put("Yes",$28.);
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);

;
		
		END;


		IF VARIABLE = "Underlying FE" THEN DO;
		VALUE_CHAR_R3=put("Yes",$28.);
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);
;
		END;
run;



PROC SORT DATA=Regression_Combined; BY N TYPE;RUN;

/*=========================================*/
ODS GRAPHICS OFF;
ODS RESULTS;
ODS SELECT ALL;
ods title "Table 10: PFOF LPM: DMMs� Execution Quality Comparison with Competitors
 (Pseudo Data)";
FOOTNOTE "This table presents results from a linear probability model for auctions at exchanges where
 the designated market maker (DMM) pays payment for orderflow (PFOF) Rule 606 DMMs
 using Rule 606 data and pseudo OPRA data.";
PROC REPORT data=Regression_Combined;
columns
variable 
value_char_R1
value_char_R2
value_char_R3



;


 DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
   DEFINE value_char_R1  /DISPLAY "(2)" format =$168.;
   DEFINE value_char_R2  /DISPLAY "(3)" format =$168.;
   DEFINE value_char_R3  /DISPLAY "(4)" format =$168.;

run;
ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
/*=========================================*/
FOOTNOTE " ";
ODS TITLE " ";





/*TABLE 11*/


PROC SORT DATA=INPUT; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 

EQ=

PFOF_IND

PCTSUM_CONTRACTS

EQ_ABOVE
EQ_ABOVE_PFOF_INT_AUC


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT

GAMMA
VEGA
BUY_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP


/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

EQ=

PFOF_IND
PCTSUM_CONTRACTS

EQ_ABOVE
EQ_ABOVE_PFOF_INT_AUC


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT

GAMMA
VEGA
BUY_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP


CHARDATE  

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;

run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;

run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;



data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_1;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;


	if parameter="PFOF_IND" then n=1;
	if parameter="EQ_ABOVE" then n=2;
	if parameter="EQ_ABOVE_PFOF_INT_AUC" then n=3;
	if parameter="PIMPR_BELOW" then n=4;
	if parameter="PIMPR_BLOW_PFOF_INT_AUC" then n=5;
	if parameter="PCTSUM_CONTRACTS" then n=6;	
	if parameter = "Abs_Delta" or parameter = "ABS_DELTA" then n=7;
	if parameter ="GAMMA" then n=8;
	if parameter ="VEGA"  then n=9;
	if parameter="STOCK_QS"  then n=10;
	if parameter ="INV_OPTION_MIDPOINT" then n=12;
	if parameter ="INV_STOCK_MIDPOINT" then  n=13;
	if parameter ="LOG_MCAP" then n=14;
	if parameter="LOG_S_VOL" then n=15;
	if parameter ="CALL_IND" then n=16;
	if parameter = "LOG_DAYS_EXP" then n=17;
	if parameter = "TICK_CHANGE_IND" then n=18;
	if parameter ="BUY_IND" then n=19;
	if parameter="Rsq" then n=20;
	if parameter ="NObs" then n=21; 


run;


proc sql;
	create table Regression_1
	as select a.*, b.Probt
	from Regression_1 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;









ODS RESULTS ON;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
PIMP_C=

PFOF_IND
PIMPR_BELOW
PIMPR_BLOW_PFOF_INT_AUC

PCTSUM_CONTRACTS



Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT

GAMMA
VEGA
BUY_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;








ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
ABSORB SYMBOL_ONLY;
CLASS CHARDATE;
model 

PIMP_C=

PFOF_IND
PIMPR_BELOW
PIMPR_BLOW_PFOF_INT_AUC

PCTSUM_CONTRACTS



Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT

GAMMA
VEGA
BUY_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP




CHARDATE 


/solution noint; 
run;
quit;






data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;
run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;
run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;




data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_2;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;

	if parameter="PFOF_IND" then n=1;
	if parameter="EQ_ABOVE" then n=2;
	if parameter="EQ_ABOVE_PFOF_INT_AUC" then n=3;
	if parameter="PIMPR_BELOW" then n=4;
	if parameter="PIMPR_BLOW_PFOF_INT_AUC" then n=5;
	if parameter="PCTSUM_CONTRACTS" then n=6;	
	if parameter = "Abs_Delta" or parameter = "ABS_DELTA" then n=7;
	if parameter ="GAMMA" then n=8;
	if parameter ="VEGA"  then n=9;
	if parameter="STOCK_QS"  then n=10;
	if parameter ="INV_OPTION_MIDPOINT" then n=12;
	if parameter ="INV_STOCK_MIDPOINT" then  n=13;
	if parameter ="LOG_MCAP" then n=14;
	if parameter="LOG_S_VOL" then n=15;
	if parameter ="CALL_IND" then n=16;
	if parameter = "LOG_DAYS_EXP" then n=17;
	if parameter = "TICK_CHANGE_IND" then n=18;
	if parameter ="BUY_IND" then n=19;
	if parameter="Rsq" then n=20;
	if parameter ="NObs" then n=21; 

run;


proc sql;
	create table Regression_2
	as select a.*, b.Probt
	from Regression_2 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;







DATA TEMP;
	SET REGRESSION_1 REGRESSION_2 ;
RUN;

PROC SORT DATA=TEMP NODUPKEY; BY PARAMETER TYPE N;RUN;



PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 

	G.PARAMETER AS PARAMETER,G.TYPE AS TYPE,G.N AS N,
	B.VALUE AS VALUE_R2,B.PROBT AS PROBT_R2, B.N as N_R2, B.PARAMETER AS PARAMETER_R2,B.TYPE AS TYPE_R2,
	D.VALUE AS VALUE_R1,D.PROBT AS PROBT_R1, D.N as N_R1, D.PARAMETER AS PARAMETER_R1,D.TYPE AS TYPE_R1
	FROM
	TEMP G
	LEFT JOIN REGRESSION_1 D
	ON G.PARAMETER=D.PARAMETER & G.TYPE=D.TYPE
	LEFT JOIN
	REGRESSION_2 B 
	ON G.PARAMETER=B.PARAMETER & G.TYPE=B.TYPE;
QUIT;








DATA Regression_Combined;
	SET Regression_Combined;
	PARAMETER = compress(PARAMETER);

IF PARAMETER="PIMPR_BELOW" & TYPE="E" then VARIABLE = PUT("Price Improvement Below$t-1$",$48.);
IF PARAMETER="EQ_ABOVE" & TYPE="E" then VARIABLE = PUT("EQ Above$t-1$",$48.);
IF PARAMETER="AUCTION_BELOW" & TYPE="E" then VARIABLE = PUT("Auction Frequency Below$t-1$",$48.);
IF PARAMETER="PCTSUM_CONTRACTS" & TYPE="E" then VARIABLE = PUT("Contract Purchases (\%)",$48.);


IF PARAMETER = "PFOF_IND" & TYPE="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER = "PFOF_IND" & TYPE="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER="AUCTION_IND" & TYPE="E" then VARIABLE = PUT("Auction",$48.);
if PARAMETER="AUCTION_IND" & TYPE="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER="QUOTEDSPREAD_C" & TYPE="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER="QUOTEDSPREAD_C" & TYPE="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER="DMMP_IND" & TYPE="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER="DMMP_IND" & TYPE="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER="VEGA" & TYPE="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER="VEGA" & TYPE="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER="ABS_DELTA"  & TYPE="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER ="ABS_DELTA" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER="GAMMA" & TYPE="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER="GAMMA" & TYPE="S" then VARIABLE = PUT(" ",$48.);


IF PARAMETER = "TICK_CHANGE_IND" & TYPE="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER = "TICK_CHANGE_IND" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER = "INV_OPTION_MIDPOINT" & TYPE="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER = "INV_OPTION_MIDPOINT" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER = "CALL_IND" & TYPE="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER = "CALL_IND" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER = "STOCK_QS" & TYPE="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER = "STOCK_QS" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER = "LOG_S_VOL" & TYPE="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER = "LOG_S_VOL" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER = "BUY_IND" & TYPE="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER = "BUY_INDD" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER="PIMPR_BLOW_PFOF_INT_AUC" & TYPE="E" THEN  VARIABLE = PUT("PFOF$\times Price Improvement Below$t-1$ ",$48.);
IF PARAMETER="EQ_ABOVE_PFOF_INT_AUC" & TYPE="E" THEN  VARIABLE = PUT("PFOF$\times EQ Above_{t-1}$ ",$48.);





IF PARAMETER = "LOG_DAYS_EXP" & TYPE="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER = "LOG_DAYS_EXP" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);




IF PARAMETER = "LOG_MCAP" & TYPE="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER = "LOG_MCAP" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER = "Rsq" & TYPE="E" THEN  VARIABLE = PUT("RSq",$48.);

IF PARAMETER = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);






	if VARIABLE not in ("RSq","NObs") & NOT MISSING(Value_R1) then do;
	
			if    PROBT_R1<0.01 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\threeS"),$28.);
			else if PROBT_R1<0.05 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\twoS"),$28.);
			else if PROBT_R1<0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\oneS"),$28.);
			else if PROBT_R1>0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$CHAR18.);
	end;
	
	



	if VARIABLE not in ("RSq","NObs") & NOT MISSING(Value_R2) then do;
	
			if    PROBT_R2<0.01 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\threeS"),$28.);
			else if PROBT_R2<0.05 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\twoS"),$28.);
			else if PROBT_R2<0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\oneS"),$28.);
			else if PROBT_R2>0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$CHAR28.);
	end;
	
	

	
	

	
	
	
	

	




		if Type="S" then do;
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_R1) THEN
				VALUE_CHAR_R1=PUT(CATS("(",PUT(Value_R1,8.3),")"),$28.);
				IF NOT MISSING(Value_R2) THEN
				VALUE_CHAR_R2=PUT(CATS("(",PUT(Value_R2,8.3),")"),$28.);
				
	
			end;
			
	
	 


if VARIABLE in ("RSq","NObs") then do;
	
VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$28.);
VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$28.);



END;





RUN;



PROC SORT DATA=Regression_Combined;BY N TYPE;RUN;

DATA Regression_Combined;
	SET Regression_Combined;
	IF TYPE="S" THEN VARIABLE=" ";
RUN;



data fixed_effects;
  do i= 1 to 2;
  output;
  end;
run;


data fixed_effects;
	set fixed_effects;
		if _n_=1 then do; VARIABLE = PUT("Date FE",$48.); N=22; TYPE="Z"; end;
		if _n_=2 then do; VARIABLE = PUT("Underlying FE",$48.); N=24; TYPE="Z";end;
		
run;


DATA Regression_Combined;
	SET Regression_Combined fixed_effects;

	IF VARIABLE = "Date FE" THEN DO;
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);

;
		
		END;


		IF VARIABLE = "Underlying FE" THEN DO;
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);
;
		END;
run;



/*=========================================*/
ODS GRAPHICS OFF;
ODS RESULTS;
ODS SELECT ALL;
PROC SORT DATA=Regression_Combined; BY N TYPE;RUN;
ods title "Table 11: PFOF Auctions and Competition (Pseudo Data)";
FOOTNOTE "This table presents regression results for PFOF effective spread to quoted spread (EQ),
 price improvement and competition for auctions at exchanges where the designated
 market maker (DMM) pays payment for orderflow (PFOF) using Rule 606 data and pseudo
 OPRA data.";
PROC REPORT data=Regression_Combined;
columns
variable 
value_char_R1
value_char_R2



;


		DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
		DEFINE value_char_R1  /DISPLAY "EQ" format =$168.;
		DEFINE value_char_R2  /DISPLAY "Price Improvement" format =$168.;

run;
ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
/*=========================================*/
FOOTNOTE " ";
ODS TITLE " ";



proc printto log=log;
run;





